********************************************************************************
*																		       *
* 							VAP								 				   *
*								     									       *
********************************************************************************

* -----> This do-file: Correlation between Twitter engagement and traditional polls

********************************************************************************

set more off
clear all
set matsize 3000
set maxvar 10000

************************************


* Path 
if "`c(username)'"=="Juan S. Morales" | "`c(username)'"=="jmorales"{
	global PathData = "C:/Users/`c(username)'/Dropbox/adamowicz/4_Draft/CPS_FINAL_submission/Data/"
	global PathFig = "C:/Users/`c(username)'/Dropbox/adamowicz/4_Draft/CPS_FINAL_submission/Figures/"
	global PathTab = "C:/Users/`c(username)'/Dropbox/adamowicz/4_Draft/CPS_FINAL_submission/Tables/"
	}
else { 
	global PathData = "/Users/JNG/Dropbox/adamowicz/4_Draft/CPS_FINAL_submission/Data/"
	global PathFig = "/Users/JNG/Dropbox/adamowicz/4_Draft/CPS_FINAL_submission/Figures/"
	global PathTab = "/Users/JNG/Dropbox/adamowicz/4_Draft/CPS_FINAL_submission/Tables/"
	} 

capture program drop save_coef
program save_coef
	args numCF
	preserve
		regsave
		sum coef if var == "_cons"
		local alpha = r(mean)
		replace coef = coef + `alpha'
		drop if var == "_cons"
		gen monthD = _n
		keep coef monthD
		rename coef coef`numCF'
		sort monthD
		save "${PathData}coef`numCF'.dta", replace
	restore
end

import excel "${PathData}cbos_2017_2019.xlsx", sheet("Sheet1") firstrow clear
gen monthD = _n
sort monthD
save "${PathData}cbos_m.dta", replace

use "${PathData}tweets_poland.dta", clear

encode screen_name, gen(user_id)
gen date = substr(created_at,1,10)
gen month = substr(date,6,2)
gen year = substr(date,1,4)
gen day = substr(date,9,2)
gen hour = substr(created_at,12,2)
destring(month), replace
destring(year), replace
destring(day), replace
destring(hour), replace
gen time = (year-2012)*12 + month
gen timeD = (year-2012)*365 + (month==2)*31 + (month==3)*(31+28) + (month==4)*(31*2 + 28) + (month==5)*(31*2 + 28 +30) + (month==6)*(31*3 + 28 +30) + (month==7)*(31*3 + 28 + 30*2) + (month==8)*(31*4 + 28 + 30*2) + (month==9)*(31*5 + 28 + 30*2) + (month==10)*(31*5 + 28 + 30*3) + (month==11)*(31*6 + 28 + 30*3) + (month==12)*(31*6 + 28 + 30*4) + day + 1*((month>2 & year==2012)|(year>2012))
gen week = round(timeD/7)
gen date2 = mdy(month, day, year)
gen monthD = (year-2012)*12 + month
keep if year>2016
gen log_engagement = log(favcount + rt_count + 1)
gen log_rt = log(rt_count + 1)
gen log_fav = log(favcount + 1)

areg log_engagement i.monthD if government, abs(user_id)
save_coef 1
areg log_engagement i.monthD if opposition, abs(user_id)
save_coef 2

preserve

clear
use "${PathData}cbos_m.dta"
sort monthD
merge monthD using "${PathData}coef1.dta"
drop _merge
sort monthD
merge monthD using "${PathData}coef2.dta"
drop _merge

capture drop coef1DT-diffTWEET_z
tsset monthD
**detrend**
gen monthD_sq = monthD*monthD
forvalues i=1(1)2 {
	reg coef`i' monthD
	predict coef`i'DT, res
}
reg pis monthD
predict pisDT, res
reg po monthD
predict poDT, res

gen diffPOLLS = pisDT-poDT
egen diffPOLLS_z = std(diffPOLLS)

gen diffTWEET = coef1DT - coef2DT
egen diffTWEET_z = std(diffTWEET)

reg diffPOLLS_z diffTWEET_z
reg diffPOLLS_z l.diffTWEET_z

gen date = ym(year,month)
label variable diffTWEET_z "Follower engagement"
label variable diffPOLLS_z "Polls avg."
label variable date "Date"

tsset date, month
set scheme sj
twoway (scatter diffTWEET_z date, graphregion(color(white)) xsize(6) connect(yes) mcolor(ebblue) lcolor(ebblue)) (scatter diffPOLLS_z date, connect(yes) mcolor(cranberry) msymbol(triangle) lcolor(cranberry) )

graph export "${PathFig}Figure1.png", width(3600) as(png) replace

est clear
eststo: reg diffPOLLS_z f4.diffTWEET_z, robust
eststo: reg diffPOLLS_z f3.diffTWEET_z, robust
eststo: reg diffPOLLS_z f2.diffTWEET_z, robust
eststo: reg diffPOLLS_z f1.diffTWEET_z, robust
eststo: reg diffPOLLS_z diffTWEET_z, robust
eststo: reg diffPOLLS_z l1.diffTWEET_z, robust
eststo: reg diffPOLLS_z l2.diffTWEET_z, robust
eststo: reg diffPOLLS_z l3.diffTWEET_z, robust
eststo: reg diffPOLLS_z l4.diffTWEET_z, robust

label variable diffTWEET_z "Follower engagement"
label variable diffPOLLS_z "Polls avg."

esttab est1 est2 est3 est4 est5 est6 est7 est8 est9 using "${PathTab}TableA6.tex", se star(* 0.1 ** 0.05 *** 0.01) varlabels("Follower engagement") stats(N r2, labels("N" "R2")) label title("Correlation between approval polls and Twitter engagement} \scriptsize {") replace nonotes noconstant nomtitles postfoot(" ")

restore
 

***Using only likes***
areg log_fav i.monthD if government, abs(user_id)
save_coef 1
areg log_fav i.monthD if opposition, abs(user_id)
save_coef 2


clear
use "${PathData}cbos_m.dta"
sort monthD
merge monthD using "${PathData}coef1.dta"
drop _merge
sort monthD
merge monthD using "${PathData}coef2.dta"
drop _merge

capture drop coef1DT-diffTWEET_z
tsset monthD
**detrend**
gen monthD_sq = monthD*monthD
forvalues i=1(1)2 {
	reg coef`i' monthD
	predict coef`i'DT, res
}
reg pis monthD
predict pisDT, res
reg po monthD
predict poDT, res

gen diffPOLLS = pisDT-poDT
egen diffPOLLS_z = std(diffPOLLS)

gen diffTWEET = coef1DT - coef2DT
egen diffTWEET_z = std(diffTWEET)

reg diffPOLLS_z diffTWEET_z
reg diffPOLLS_z l.diffTWEET_z

gen date = ym(year,month)
label variable diffTWEET_z "Follower engagement"
label variable diffPOLLS_z "Polls avg."
label variable date "Date"


est clear
eststo: reg diffPOLLS_z f4.diffTWEET_z, robust
eststo: reg diffPOLLS_z f3.diffTWEET_z, robust
eststo: reg diffPOLLS_z f2.diffTWEET_z, robust
eststo: reg diffPOLLS_z f1.diffTWEET_z, robust
eststo: reg diffPOLLS_z diffTWEET_z, robust
eststo: reg diffPOLLS_z l1.diffTWEET_z, robust
eststo: reg diffPOLLS_z l2.diffTWEET_z, robust
eststo: reg diffPOLLS_z l3.diffTWEET_z, robust
eststo: reg diffPOLLS_z l4.diffTWEET_z, robust

label variable diffTWEET_z "Follower engagement"
label variable diffPOLLS_z "Polls avg."

esttab est1 est2 est3 est4 est5 est6 est7 est8 est9 using "${PathTab}TableA7.tex", se star(* 0.1 ** 0.05 *** 0.01) varlabels("Follower engagement") stats(N r2, labels("N" "R2")) label title("Correlation between approval polls and Twitter likes} \scriptsize {") replace nonotes noconstant nomtitles postfoot(" ")

